package com.novel.crawl.bxwx9.service.impl;

import com.novel.crawl.bxwx9.service.CrawlService;
import com.novel.crawl.bxwx9.spider.processor.NovelProcessor;
import com.novel.crawl.common.entity.Book;
import com.novel.crawl.common.service.BookService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;

import java.util.List;
import java.util.stream.Collectors;

/**
 * @author 奔波儿灞
 * @since 1.0
 */
@Service
public class CrawlServiceImpl implements CrawlService {

    private static final Logger LOG = LoggerFactory.getLogger(CrawlServiceImpl.class);

    private static final int DEFAULT_SIZE = 200;

    private static final String DEFAULT_BEGIN_ID = "000000000000000000000000";

    private static final String DEFAULT_BOOK_START_URL = "https://www.bxwx9.org/bsort/0/1.htm";

    @Autowired
    private BookService bookService;

    @Autowired
    private Spider spider;

    @Autowired
    private Spider bookSpider;

    @Override
    public void crawl() {
        String beginBookId = DEFAULT_BEGIN_ID;
        List<Book> books = bookService.findBatchByBookIdStartAndSource(beginBookId, Book.SOURCE_TYPE_ME, DEFAULT_SIZE);
        LOG.info("batch crawl book, beginBookId: {}, book num: {}", beginBookId, books.size());
        while (!CollectionUtils.isEmpty(books)) {
            // 抓取
            doCrawl(books);
            // 查询下一批次
            int size = books.size();
            beginBookId = books.get(size - 1).getId();
            books = bookService.findBatchByBookIdStartAndSource(beginBookId, Book.SOURCE_TYPE_ME, DEFAULT_SIZE);
            LOG.info("batch crawl book, beginBookId: {}, book num: {}", beginBookId, books.size());
        }
    }

    @Override
    public void crawlBook() {
        bookSpider.addUrl(DEFAULT_BOOK_START_URL).run();
    }

    private void doCrawl(List<Book> books) {
        // 将bookId传入请求
        List<Request> requests = books.stream()
                .map(this::toRequest)
                .collect(Collectors.toList());
        spider.startRequest(requests).run();
    }

    private Request toRequest(Book book) {
        String url = book.getUrl();
        Request request = new Request(url);
        request.putExtra(NovelProcessor.ITEM_BOOK, book);
        return request;
    }

}
